Importaciones¶
In [1]:
!pip install mapclassify
!pip install geopandas
!pip install factor_analyzer
!pip install plotnine
Requirement already satisfied: mapclassify in c:\users\pablo-pc\anaconda3\lib\site-packages (2.8.0) Requirement already satisfied: networkx>=2.7 in c:\users\pablo-pc\anaconda3\lib\site-packages (from mapclassify) (3.1) Requirement already satisfied: numpy>=1.23 in c:\users\pablo-pc\anaconda3\lib\site-packages (from mapclassify) (1.26.4) Requirement already satisfied: pandas!=1.5.0,>=1.4 in c:\users\pablo-pc\anaconda3\lib\site-packages (from mapclassify) (2.1.4) Requirement already satisfied: scikit-learn>=1.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from mapclassify) (1.2.2) Requirement already satisfied: scipy>=1.8 in c:\users\pablo-pc\anaconda3\lib\site-packages (from mapclassify) (1.11.4) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2.8.2) Requirement already satisfied: pytz>=2020.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas!=1.5.0,>=1.4->mapclassify) (2023.3) Requirement already satisfied: joblib>=1.1.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from scikit-learn>=1.0->mapclassify) (1.2.0) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from scikit-learn>=1.0->mapclassify) (2.2.0) Requirement already satisfied: six>=1.5 in c:\users\pablo-pc\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas!=1.5.0,>=1.4->mapclassify) (1.16.0) Requirement already satisfied: geopandas in c:\users\pablo-pc\anaconda3\lib\site-packages (1.0.1) Requirement already satisfied: numpy>=1.22 in c:\users\pablo-pc\anaconda3\lib\site-packages (from geopandas) (1.26.4) Requirement already satisfied: pyogrio>=0.7.2 in c:\users\pablo-pc\anaconda3\lib\site-packages (from geopandas) (0.9.0) Requirement already satisfied: packaging in c:\users\pablo-pc\anaconda3\lib\site-packages (from geopandas) (23.1) Requirement already satisfied: pandas>=1.4.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from geopandas) (2.1.4) Requirement already satisfied: pyproj>=3.3.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from geopandas) (3.6.1) Requirement already satisfied: shapely>=2.0.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from geopandas) (2.0.5) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2.8.2) Requirement already satisfied: pytz>=2020.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2023.3) Requirement already satisfied: certifi in c:\users\pablo-pc\anaconda3\lib\site-packages (from pyogrio>=0.7.2->geopandas) (2024.2.2) Requirement already satisfied: six>=1.5 in c:\users\pablo-pc\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas>=1.4.0->geopandas) (1.16.0) Requirement already satisfied: factor_analyzer in c:\users\pablo-pc\anaconda3\lib\site-packages (0.5.1) Requirement already satisfied: pandas in c:\users\pablo-pc\anaconda3\lib\site-packages (from factor_analyzer) (2.1.4) Requirement already satisfied: scipy in c:\users\pablo-pc\anaconda3\lib\site-packages (from factor_analyzer) (1.11.4) Requirement already satisfied: numpy in c:\users\pablo-pc\anaconda3\lib\site-packages (from factor_analyzer) (1.26.4) Requirement already satisfied: scikit-learn in c:\users\pablo-pc\anaconda3\lib\site-packages (from factor_analyzer) (1.2.2) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas->factor_analyzer) (2.8.2) Requirement already satisfied: pytz>=2020.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas->factor_analyzer) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas->factor_analyzer) (2023.3) Requirement already satisfied: joblib>=1.1.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from scikit-learn->factor_analyzer) (1.2.0) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from scikit-learn->factor_analyzer) (2.2.0) Requirement already satisfied: six>=1.5 in c:\users\pablo-pc\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas->factor_analyzer) (1.16.0) Requirement already satisfied: plotnine in c:\users\pablo-pc\anaconda3\lib\site-packages (0.13.6) Requirement already satisfied: matplotlib>=3.7.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from plotnine) (3.8.0) Requirement already satisfied: pandas<3.0.0,>=2.1.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from plotnine) (2.1.4) Requirement already satisfied: mizani~=0.11.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from plotnine) (0.11.4) Requirement already satisfied: numpy>=1.23.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from plotnine) (1.26.4) Requirement already satisfied: scipy>=1.7.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from plotnine) (1.11.4) Requirement already satisfied: statsmodels>=0.14.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from plotnine) (0.14.0) Requirement already satisfied: contourpy>=1.0.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from matplotlib>=3.7.0->plotnine) (1.2.0) Requirement already satisfied: cycler>=0.10 in c:\users\pablo-pc\anaconda3\lib\site-packages (from matplotlib>=3.7.0->plotnine) (0.11.0) Requirement already satisfied: fonttools>=4.22.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from matplotlib>=3.7.0->plotnine) (4.25.0) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from matplotlib>=3.7.0->plotnine) (1.4.4) Requirement already satisfied: packaging>=20.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from matplotlib>=3.7.0->plotnine) (23.1) Requirement already satisfied: pillow>=6.2.0 in c:\users\pablo-pc\anaconda3\lib\site-packages (from matplotlib>=3.7.0->plotnine) (10.2.0) Requirement already satisfied: pyparsing>=2.3.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from matplotlib>=3.7.0->plotnine) (3.0.9) Requirement already satisfied: python-dateutil>=2.7 in c:\users\pablo-pc\anaconda3\lib\site-packages (from matplotlib>=3.7.0->plotnine) (2.8.2) Requirement already satisfied: tzdata in c:\users\pablo-pc\anaconda3\lib\site-packages (from mizani~=0.11.0->plotnine) (2023.3) Requirement already satisfied: pytz>=2020.1 in c:\users\pablo-pc\anaconda3\lib\site-packages (from pandas<3.0.0,>=2.1.0->plotnine) (2023.3.post1) Requirement already satisfied: patsy>=0.5.2 in c:\users\pablo-pc\anaconda3\lib\site-packages (from statsmodels>=0.14.0->plotnine) (0.5.3) Requirement already satisfied: six in c:\users\pablo-pc\anaconda3\lib\site-packages (from patsy>=0.5.2->statsmodels>=0.14.0->plotnine) (1.16.0)
In [2]:
import pandas as pd
import numpy as np
from tabulate import tabulate
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import plotnine
from plotnine import *
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, scale
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy import stats
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
plt.rcParams['axes.labelsize'] = 10
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
In [3]:
df = pd.read_csv('02_componentes_minmax.csv')
Funciones¶
In [4]:
# Definir la función generalized_mean
def generalized_mean(components, beta):
EPSILON = 1e-8 # Pequeño valor constante
if beta != 0:
return np.power(np.mean(np.power(components + EPSILON, beta)), 1 / beta)
else:
return np.power(np.prod(components + EPSILON), 1 / len(components))
# Definir la función de normalización Min-Max
def min_max_normalize(series):
return ((series - series.min()) / (series.max() - series.min())) * 100
# Calcular el intervalo de confianza al 95% para cada columna
def calcular_intervalo_confianza(data):
mean = np.mean(data)
std_error = np.std(data, ddof=1) / np.sqrt(len(data))
margin_of_error = 1.96 * std_error # 1.96 es el valor crítico para un intervalo de confianza del 95%
return mean, mean - margin_of_error, mean + margin_of_error
In [5]:
# Función para realizar bootstrap: sensibilidad e incertidumbre
def bootstrap_analysis(X, beta, n_bootstrap=1000):
n_rows = X.shape[0]
# Función para calcular el índice compuesto de una muestra
def compute_index(sample):
return generalized_mean(sample, beta)
# Lista para almacenar resultados de bootstrap
bootstrap_results = []
for _ in range(n_bootstrap):
# Muestreo con reemplazo
sample_indices = np.random.choice(X.index, size=n_rows, replace=True)
sample = X.loc[sample_indices]
# Asegurarse de que los valores están en el rango [0, 100]
sample = np.clip(sample, 0, 100)
# Calcular índice compuesto para la muestra
index_values = sample.apply(lambda row: compute_index(row.values), axis=1)
bootstrap_results.extend(index_values)
# Convertir resultados bootstrap a una Serie
bootstrap_series = pd.Series(bootstrap_results)
# Calcular varianza
variance = bootstrap_series.var()
# Calcular intervalo de confianza
mean, lower_bound, upper_bound = calcular_intervalo_confianza(bootstrap_series)
# Crear DataFrame con los resultados de bootstrap
bootstrap_df = pd.DataFrame({
'Bootstrap Index': bootstrap_series
})
# Crear DataFrame con las cifras de varianza y intervalo de confianza
results = pd.DataFrame({
'Variance': [variance],
'Confidence Interval Mean': [mean],
'Confidence Interval Lower': [lower_bound],
'Confidence Interval Upper': [upper_bound]
})
return bootstrap_df, results
In [6]:
# Visualización barras
def plot_bar_chart(df, variables, id_var='country', hue_var='variable', palette="Set2", errorbar='ci'):
"""
Genera un gráfico de barras con las variables especificadas.
Parameters:
df (pd.DataFrame): DataFrame que contiene los datos.
variables (list): Lista de columnas a utilizar como variables.
id_var (str): Columna que identifica las categorías (por defecto 'country').
hue_var (str): Nombre de la variable de agrupación (por defecto 'variable').
palette (str): Paleta de colores a utilizar (por defecto 'Set2').
errorbar (str): Tipo de barra de error a mostrar (por defecto 'ci').
Returns:
None
"""
sns.set(style="whitegrid")
# Configurar el tamaño del gráfico
plt.figure(figsize=(12, 8))
# Crear el gráfico de barras
melted_df = pd.melt(df, id_vars=[id_var], value_vars=variables)
sns.barplot(x=id_var, y='value', hue=hue_var, data=melted_df, palette=palette, errorbar=errorbar)
# Añadir título y etiquetas
plt.xlabel(id_var.capitalize())
plt.ylabel('Value')
plt.title('Bar Chart')
# Rotar las etiquetas del eje x para mejor visualización
plt.xticks(rotation=90)
# Ajustar el diseño para evitar solapamiento
plt.tight_layout()
# Mostrar el gráfico
plt.show()
# Boxplot
def plot_box_chart(df, variables, id_var='country', hue_var='variable', palette="Set2"):
"""
Genera un gráfico de caja con las variables especificadas.
Parameters:
df (pd.DataFrame): DataFrame que contiene los datos.
variables (list): Lista de columnas a utilizar como variables.
id_var (str): Columna que identifica las categorías (por defecto 'country').
hue_var (str): Nombre de la variable de agrupación (por defecto 'variable').
palette (str): Paleta de colores a utilizar (por defecto 'Set2').
Returns:
None
"""
sns.set(style="whitegrid")
# Configurar el tamaño del gráfico
plt.figure(figsize=(12, 8))
# Crear el gráfico de caja
melted_df = pd.melt(df, id_vars=[id_var], value_vars=variables)
sns.boxplot(x=id_var, y='value', hue=hue_var, data=melted_df, palette=palette)
# Añadir título y etiquetas
plt.xlabel(id_var.capitalize())
plt.ylabel('Value')
plt.title('Box Plot')
# Rotar las etiquetas del eje x para mejor visualización
plt.xticks(rotation=90)
# Ajustar el diseño para evitar solapamiento
plt.tight_layout()
# Mostrar el gráfico
plt.show()
# Mapas
def plot_maps(df, variables, region_var='region', exclude_regions=None, color='lightgrey', scheme='natural_breaks', k=8, cmap='BrBG'):
"""
Genera subplots de mapas para las variables especificadas.
Parameters:
df (gpd.GeoDataFrame): GeoDataFrame que contiene los datos.
variables (list): Lista de columnas a utilizar como variables.
region_var (str): Columna que identifica las regiones (por defecto 'region').
exclude_regions (list): Lista de regiones a excluir del análisis (por defecto None).
color (str): Color de fondo de la figura (por defecto 'lightgrey').
scheme (str): Esquema de clasificación de colores (por defecto 'natural_breaks').
k (int): Número de intervalos para la clasificación (por defecto 8).
cmap (str): Paleta de colores a utilizar (por defecto 'BrBG').
Returns:
None
"""
if exclude_regions is not None:
df = df[~df[region_var].isin(exclude_regions)]
# Crear la figura y los ejes (subplots)
fig, axes = plt.subplots(1, len(variables), figsize=(18, 6)) # Ajustar el número de columnas según las variables
# Si solo hay un gráfico, hacer que 'axes' sea una lista
if len(variables) == 1:
axes = [axes]
# Configurar el color de fondo de la figura
fig.patch.set_facecolor(color)
# Iterar sobre cada eje y crear el gráfico correspondiente
for i, var in enumerate(variables):
ax = axes[i]
df.plot(column=var,
scheme=scheme,
k=k, cmap=cmap,
edgecolor='w',
legend=True,
ax=ax,
legend_kwds={'loc': 'upper left', 'fontsize': 8, 'title': var}
)
ax.set_title(var) # Título de cada subplot
ax.set_axis_off()
plt.tight_layout()
plt.show()
Dimensiones¶
- Crear un data frame para almacenar los indices creados al nivel de dimensiones
In [7]:
dimensiones_df = df[['country', 'nuts_code', 'region']]
dimensiones_df.set_index('nuts_code', inplace=True)
- Añadir datos geograficos para representar en mapas
In [8]:
# Cargar los shapefiles de las regiones NUTS
shapefile_path = 'NUTS_RG_20M_2021_3035.shp'
gdf = gpd.read_file(shapefile_path)
In [9]:
dimensiones_gdf=pd.merge(gdf[['NUTS_ID','geometry']], dimensiones_df, how='right', left_on='NUTS_ID', right_on='nuts_code')
dimensiones_gdf = dimensiones_gdf.rename(columns={'NUTS_ID': 'nuts_code'})
In [10]:
overseas = ['Guyane', 'Guadeloupe', 'Martinique', 'La Réunion', 'Mayotte', "Região Autónoma dos Açores", 'Região Autónoma da Madeira', 'Canarias']
- Agrupar los componentes en dimensiones, según la estructura de la Comisión Europea
In [11]:
basic_needs = ["nutrition_medical_care",
'water_sanitation',
'safety']
foundations_wellbeing = ['basic_education',
'info_comm',
'health',
'env_quality']
opportunity = ['trust_gov',
'freedom_choice',
'inclusive_society',
'adv_education']
Basic needs¶
In [12]:
# basic_needs
data = {
'nuts_code': df['nuts_code']}
for var in basic_needs:
data[var] = df[var]
X = pd.DataFrame(data)
X.set_index('nuts_code', inplace=True)
In [13]:
# Agregación
beta = 0.5
X['basic_needs'] = X.apply(lambda row: generalized_mean(row.values, beta), axis=1)
# Añadir a dimensiones
subset = X['basic_needs']
dimensiones_df = pd.merge(dimensiones_df, subset, on='nuts_code')
dimensiones_gdf = pd.merge(dimensiones_gdf, subset, on='nuts_code')
[Figura 03.1]
In [14]:
print('Figura 03.1')
variables = ['basic_needs']
plot_bar_chart(dimensiones_df, variables, palette="Pastel2")
plot_box_chart(dimensiones_df, variables, palette="Pastel1")
plot_maps(dimensiones_gdf, variables, exclude_regions=overseas, scheme='quantiles')
Figura 03.1
In [15]:
plot_maps(dimensiones_gdf, variables, exclude_regions=overseas, scheme='stdmean')
- Analisis de sensibilidad
[Figura 03.2] Analisis de distintos betas
In [16]:
print('Figura 03.2')
data = {
'nuts_code': df['nuts_code'],
}
for var in basic_needs:
data[var] = df[var]
X_test = pd.DataFrame(data)
X_test.set_index('nuts_code', inplace=True)
# Valores de beta a probar
beta_values = [-1, 0, 0.5, 1, 2]
# Almacenar los resultados
sensitivity_results = pd.DataFrame(index=X_test.index)
for beta in beta_values:
sensitivity_results[f'basic_needs{beta}'] = X_test.apply(lambda row: generalized_mean(row.values, beta), axis=1)
# Graficar los resultados de sensibilidad para diferentes valores de beta =====================================================
plt.figure(figsize=(10, 6))
for beta in beta_values:
sns.kdeplot(sensitivity_results[f'basic_needs{beta}'], label=f'beta={beta}', fill=False)
plt.title('Distribución para diferentes valores de Beta')
plt.xlabel('Dimension')
plt.ylabel('Densidad')
plt.legend()
# Guardar la figura en un archivo PNG
plt.savefig('graficos/analisis_betas_basic_needs.png', dpi=300)
plt.show()
Figura 03.2
[Figura 03.3]
In [17]:
print('Figura 03.3')
print()
n_bootstrap=1000
beta = 0.5
# Realizar análisis bootstrap
bootstrap_df, results = bootstrap_analysis(X, beta, n_bootstrap=n_bootstrap)
print("Bootstrap:")
print(tabulate(results, headers='keys', tablefmt='pretty'))
print()
print('Datos originales:')
# Datos originales
varianza = subset.var()
intervalo_confianza = calcular_intervalo_confianza(subset)
print("Varianzas:")
print(varianza)
print("\nIntervalo de Confianza al 95%:")
print(intervalo_confianza)
print()
# Distribución
X['Indice']='Original'
bootstrap_df['Indice']='Bootstrap'
X_combined = X[['basic_needs', 'Indice']].copy()
bootstrap_combined = bootstrap_df[['Bootstrap Index', 'Indice']].copy()
# Combinar ambos DataFrames
combined_df = pd.concat([X_combined, bootstrap_combined], axis=0)
# Crear la gráfica
plt.figure(figsize=(12, 6))
# Gráfico de densidad para los índices originales y bootstrap
sns.kdeplot(data=combined_df[combined_df['Indice'] == 'Original'], x='basic_needs', label='Original', fill=True, common_norm=False)
sns.kdeplot(data=combined_df[combined_df['Indice'] == 'Bootstrap'], x='Bootstrap Index', label='Bootstrap', fill=True, common_norm=False)
# Configurar título y etiquetas
plt.title('Distribución del Índice: Original vs. Bootstrap')
plt.xlabel('Índice Compuesto')
plt.ylabel('Densidad')
# Mostrar leyenda
plt.legend(title='Tipo de Índice')
# Guardar la figura en un archivo PNG
plt.savefig('graficos/bootstrap_basic_needs.png', dpi=300)
# Mostrar la gráfica
plt.show()
Figura 03.3 Bootstrap: +---+--------------------+--------------------------+---------------------------+---------------------------+ | | Variance | Confidence Interval Mean | Confidence Interval Lower | Confidence Interval Upper | +---+--------------------+--------------------------+---------------------------+---------------------------+ | 0 | 187.41086562104996 | 68.59462830164252 | 68.54008445388702 | 68.64917214939801 | +---+--------------------+--------------------------+---------------------------+---------------------------+ Datos originales: Varianzas: 188.26885568043105 Intervalo de Confianza al 95%: (68.59297293678928, 66.86420129531805, 70.3217445782605)
Foundations of wellbeing¶
In [18]:
# foundations_wellbeing
data = {'nuts_code': df['nuts_code']}
for var in foundations_wellbeing:
data[var] = df[var]
X = pd.DataFrame(data)
X.set_index('nuts_code', inplace=True)
[Figura 03.4]
In [19]:
print('Figura 03.4')
# Agregación
beta = 0.5
X['foundations_wellbeing'] = X.apply(lambda row: generalized_mean(row.values, beta), axis=1)
# Añadir a dimensiones
subset = X['foundations_wellbeing']
dimensiones_df = pd.merge(dimensiones_df, subset, on='nuts_code')
dimensiones_gdf = pd.merge(dimensiones_gdf, subset, on='nuts_code')
# Graficos y mapas
variables = ['foundations_wellbeing']
plot_bar_chart(dimensiones_df, variables, palette="Pastel2")
plot_box_chart(dimensiones_df, variables, palette="Pastel1")
plot_maps(dimensiones_gdf, variables, exclude_regions=overseas, scheme='quantiles')
Figura 03.4
In [20]:
plot_maps(dimensiones_gdf, variables, exclude_regions=overseas, scheme='stdmean')
- Analisis de sensibilidad
[Figura 03.5] Analisis de distintos betas
In [21]:
print('Figura 03.5')
data = {
'nuts_code': df['nuts_code'],
}
for var in foundations_wellbeing:
data[var] = df[var]
X_test = pd.DataFrame(data)
X_test.set_index('nuts_code', inplace=True)
# Valores de beta a probar
beta_values = [-1, 0, 0.5, 1, 2]
# Almacenar los resultados
sensitivity_results = pd.DataFrame(index=X_test.index)
for beta in beta_values:
sensitivity_results[f'foundations_wellbeing{beta}'] = X_test.apply(lambda row: generalized_mean(row.values, beta), axis=1)
# Graficar los resultados de sensibilidad para diferentes valores de beta =====================================================
plt.figure(figsize=(10, 6))
for beta in beta_values:
sns.kdeplot(sensitivity_results[f'foundations_wellbeing{beta}'], label=f'beta={beta}', fill=False)
plt.title('Distribución para diferentes valores de Beta')
plt.xlabel('Dimension')
plt.ylabel('Densidad')
plt.legend()
# Guardar la figura en un archivo PNG
plt.savefig('graficos/analisis_betas_foundations_wellbeing.png', dpi=300)
plt.show()
Figura 03.5
[Figura 03.6]
In [22]:
print('Figura 03.6')
print()
n_bootstrap=1000
beta = 0.5
# Realizar análisis bootstrap
bootstrap_df, results = bootstrap_analysis(X, beta, n_bootstrap=n_bootstrap)
print("Bootstrap:")
print(tabulate(results, headers='keys', tablefmt='pretty'))
print()
print('Datos originales:')
# Datos originales
varianza = subset.var()
intervalo_confianza = calcular_intervalo_confianza(subset)
print("Varianzas:")
print(varianza)
print("\nIntervalo de Confianza al 95%:")
print(intervalo_confianza)
print()
# Distribución
X['Indice']='Original'
bootstrap_df['Indice']='Bootstrap'
X_combined = X[['foundations_wellbeing', 'Indice']].copy()
bootstrap_combined = bootstrap_df[['Bootstrap Index', 'Indice']].copy()
# Combinar ambos DataFrames
combined_df = pd.concat([X_combined, bootstrap_combined], axis=0)
# Crear la gráfica
plt.figure(figsize=(12, 6))
# Gráfico de densidad para los índices originales y bootstrap
sns.kdeplot(data=combined_df[combined_df['Indice'] == 'Original'], x='foundations_wellbeing', label='Original', fill=True, common_norm=False)
sns.kdeplot(data=combined_df[combined_df['Indice'] == 'Bootstrap'], x='Bootstrap Index', label='Bootstrap', fill=True, common_norm=False)
# Configurar título y etiquetas
plt.title('Distribución del Índice: Original vs. Bootstrap')
plt.xlabel('Índice Compuesto')
plt.ylabel('Densidad')
# Mostrar leyenda
plt.legend(title='Tipo de Índice')
# Mostrar la gráfica
plt.show()
Figura 03.6 Bootstrap: +---+--------------------+--------------------------+---------------------------+---------------------------+ | | Variance | Confidence Interval Mean | Confidence Interval Lower | Confidence Interval Upper | +---+--------------------+--------------------------+---------------------------+---------------------------+ | 0 | 222.90609823338784 | 65.73184061010352 | 65.67235535021068 | 65.79132586999636 | +---+--------------------+--------------------------+---------------------------+---------------------------+ Datos originales: Varianzas: 222.89733081642086 Intervalo de Confianza al 95%: (65.7500851183707, 63.869033027861754, 67.63113720887966)
Opportunity¶
In [23]:
# opportunity
data = {'nuts_code': df['nuts_code']}
for var in opportunity:
data[var] = df[var]
X = pd.DataFrame(data)
X.set_index('nuts_code', inplace=True)
[Figura 03.7]
In [24]:
print('Figura 03.7')
# Agregación
beta = 0.5
X['opportunity'] = X.apply(lambda row: generalized_mean(row.values, beta), axis=1)
# Añadir a dimensiones
subset = X['opportunity']
dimensiones_df = pd.merge(dimensiones_df, subset, on='nuts_code')
dimensiones_gdf = pd.merge(dimensiones_gdf, subset, on='nuts_code')
# Graficos y mapas
variables = ['opportunity']
plot_bar_chart(dimensiones_df, variables, palette="Pastel2")
plot_box_chart(dimensiones_df, variables, palette="Pastel1")
plot_maps(dimensiones_gdf, variables, exclude_regions=overseas, scheme='quantiles')
plot_maps(dimensiones_gdf, variables, exclude_regions=overseas, scheme='stdmean')
Figura 03.7
- Analisis de sensibilidad
[Figura 03.8] Analisis para distintos betas
In [25]:
print('Figura 03.8')
data = {
'nuts_code': df['nuts_code'],
}
for var in opportunity:
data[var] = df[var]
X_test = pd.DataFrame(data)
X_test.set_index('nuts_code', inplace=True)
# Valores de beta a probar
beta_values = [-1, 0, 0.5, 1, 2]
# Almacenar los resultados
sensitivity_results = pd.DataFrame(index=X_test.index)
for beta in beta_values:
sensitivity_results[f'opportunity{beta}'] = X_test.apply(lambda row: generalized_mean(row.values, beta), axis=1)
# Graficar los resultados de sensibilidad para diferentes valores de beta =====================================================
plt.figure(figsize=(10, 6))
for beta in beta_values:
sns.kdeplot(sensitivity_results[f'opportunity{beta}'], label=f'beta={beta}', fill=False)
plt.title('Distribución para diferentes valores de Beta')
plt.xlabel('Dimension')
plt.ylabel('Densidad')
plt.legend()
# Guardar la figura en un archivo PNG
plt.savefig('graficos/analisis_betas_opportunity.png', dpi=300)
plt.show()
Figura 03.8
[Figura 03.9]
In [26]:
print('Figura 03.9')
print()
n_bootstrap=1000
beta = 0.5
# Realizar análisis bootstrap
bootstrap_df, results = bootstrap_analysis(X, beta, n_bootstrap=n_bootstrap)
print("Bootstrap:")
print(tabulate(results, headers='keys', tablefmt='pretty'))
print()
print('Datos originales:')
# Datos originales
varianza = subset.var()
intervalo_confianza = calcular_intervalo_confianza(subset)
print("Varianzas:")
print(varianza)
print("\nIntervalo de Confianza al 95%:")
print(intervalo_confianza)
print()
# Distribución
X['Indice']='Original'
bootstrap_df['Indice']='Bootstrap'
X_combined = X[['opportunity', 'Indice']].copy()
bootstrap_combined = bootstrap_df[['Bootstrap Index', 'Indice']].copy()
# Combinar ambos DataFrames
combined_df = pd.concat([X_combined, bootstrap_combined], axis=0)
# Crear la gráfica
plt.figure(figsize=(12, 6))
# Gráfico de densidad para los índices originales y bootstrap
sns.kdeplot(data=combined_df[combined_df['Indice'] == 'Original'], x='opportunity', label='Original', fill=True, common_norm=False)
sns.kdeplot(data=combined_df[combined_df['Indice'] == 'Bootstrap'], x='Bootstrap Index', label='Bootstrap', fill=True, common_norm=False)
# Configurar título y etiquetas
plt.title('Distribución del Índice: Original vs. Bootstrap')
plt.xlabel('Índice Compuesto')
plt.ylabel('Densidad')
# Mostrar leyenda
plt.legend(title='Tipo de Índice')
# Mostrar la gráfica
plt.show()
Figura 03.9 Bootstrap: +---+--------------------+--------------------------+---------------------------+---------------------------+ | | Variance | Confidence Interval Mean | Confidence Interval Lower | Confidence Interval Upper | +---+--------------------+--------------------------+---------------------------+---------------------------+ | 0 | 181.28790154700897 | 50.17818901064033 | 50.12454357217139 | 50.23183444910927 | +---+--------------------+--------------------------+---------------------------+---------------------------+ Datos originales: Varianzas: 182.3318505325379 Intervalo de Confianza al 95%: (50.17589410762928, 48.47459897991211, 51.87718923534646)
Analisis Dimensiones¶
[Figura 03.10]
In [27]:
print('Figura 03.10')
dimensiones_df.describe()
Figura 03.10
Out[27]:
| basic_needs | foundations_wellbeing | opportunity | |
|---|---|---|---|
| count | 242.000000 | 242.000000 | 242.000000 |
| mean | 68.592973 | 65.750085 | 50.175894 |
| std | 13.721110 | 14.929747 | 13.503031 |
| min | 9.982744 | 21.189226 | 18.719860 |
| 25% | 61.765380 | 56.941928 | 41.187763 |
| 50% | 71.852399 | 70.328848 | 52.057655 |
| 75% | 78.346437 | 74.363466 | 58.335474 |
| max | 91.397907 | 92.378837 | 83.273843 |
In [28]:
num_cols = dimensiones_df.select_dtypes(include=['float64', 'int64'])
dimensiones_df[num_cols.columns] = num_cols.round(2)
In [29]:
variables = [
'basic_needs',
'foundations_wellbeing',
'opportunity'
]
[Figura 03.11]
In [30]:
print('Figura 03.11')
# Boxplot
df_boxplot = dimensiones_df[variables]
plt.figure(figsize=(12, 8))
ax = sns.boxplot(data=df_boxplot, palette='tab20')
plt.xlabel('Dimensions')
plt.ylabel('Scores')
plt.title('Distribution of Scores Across EU Dimensions')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()
Figura 03.11
[Figura 03.12]
In [31]:
print('Figura 03.12')
corr = dimensiones_df.select_dtypes(include='number').corr()
corr_melt = corr.reset_index().melt(id_vars='index')
corr_melt.columns = ['Var1', 'Var2', 'Correlation']
p = (ggplot(corr_melt, aes(x='Var1', y='Var2', fill='Correlation')) +
geom_tile() +
geom_text(aes(label=corr_melt['Correlation'].round(2)), size=8, color='black') +
scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=0) +
theme_minimal() +
theme(axis_text_x=element_text(rotation=90, hjust=1)) +
labs(x='', y='', title='Correlation Heatmap'))
print(p)
Figura 03.12
[Figura 03.13]
In [32]:
print('Figura 03.13')
num_cols = dimensiones_df.select_dtypes(include=['number']).columns
fig, axs = plt.subplots(ncols=3, nrows=1, figsize=(20, 6))
axs = axs.flatten()
index = 0
for col in dimensiones_df[num_cols]:
sns.distplot(dimensiones_df[col], bins=20, ax=axs[index])
index += 1
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=5.0)
Figura 03.13
[Figura 03.13]
In [33]:
print('Figura 03.13')
num_cols= dimensiones_df.select_dtypes(include=['float64', 'int64'])
country_col = 'country'
for col in num_cols:
plt.figure(figsize=(12, 6))
sns.boxplot(x='country', y=col, data=dimensiones_df, color='#9ecae1')
mean = dimensiones_df[col].mean()
plt.axhline(mean, color='red', linestyle='--', linewidth=1, label=f'Media ({mean:.2f})')
plt.title(col)
plt.grid(True)
plt.show()
Figura 03.13
[Figura 03.14]
In [34]:
print('Figura 03.14')
# Mapas
filtered = dimensiones_gdf[~dimensiones_gdf['region'].isin(overseas)]
num_cols = filtered.select_dtypes(include=np.number).columns
for column in num_cols:
fig, axes = plt.subplots(1, 2, figsize=(24, 12)) # Crear dos subgráficos pareados
fig.patch.set_facecolor('lightgrey')
# Primer gráfico usando scheme='equal_interval'
filtered.plot(column=column,
scheme='equal_interval',
k=8,
cmap='BrBG',
edgecolor='w',
legend=True,
ax=axes[0],
legend_kwds={'loc': 'upper left', 'fontsize':'8', 'title': column}
)
axes[0].set_title(f'{column} - Equal Interval')
axes[0].set_axis_off()
# Segundo gráfico usando scheme='std_mean'
filtered.plot(column=column,
scheme='std_mean',
cmap='BrBG',
edgecolor='w',
legend=True,
ax=axes[1],
legend_kwds={'loc': 'upper left', 'fontsize':'8', 'title': column}
)
axes[1].set_title(f'{column} - Std Mean')
axes[1].set_axis_off()
plt.show()
Figura 03.14
[Figura 03.15]
In [35]:
print('Figura 03.15')
# Mapas
filtered = dimensiones_gdf[~dimensiones_gdf['region'].isin(overseas)]
num_cols = filtered.select_dtypes(include=np.number).columns
for column in num_cols:
fig, axes = plt.subplots(1, 2, figsize=(24, 12)) # Crear dos subgráficos pareados
fig.patch.set_facecolor('lightgrey')
# Primer gráfico usando scheme='quantiles'
filtered.plot(column=column,
scheme='quantiles',
k=8,
cmap='BrBG',
edgecolor='w',
legend=True,
ax=axes[0],
legend_kwds={'loc': 'upper left', 'fontsize':'8', 'title': column}
)
axes[0].set_title(f'{column} - Quantiles')
axes[0].set_axis_off()
# Segundo gráfico usando scheme='std_mean'
filtered.plot(column=column,
scheme='natural_breaks',
cmap='BrBG',
edgecolor='w',
legend=True,
ax=axes[1],
legend_kwds={'loc': 'upper left', 'fontsize':'8', 'title': column}
)
axes[1].set_title(f'{column} - Natural Breaks')
axes[1].set_axis_off()
plt.show()
Figura 03.15
GDP¶
In [36]:
dimensiones_df=pd.merge(dimensiones_df, df[['nuts_code','gdp_per_capita_2022']], how='left', on='nuts_code')
[Figura 03.16]
In [37]:
print('Figura 03.16')
dimensiones_df.describe()
Figura 03.16
Out[37]:
| basic_needs | foundations_wellbeing | opportunity | gdp_per_capita_2022 | |
|---|---|---|---|---|
| count | 242.000000 | 242.000000 | 242.000000 | 242.000000 |
| mean | 68.592934 | 65.749835 | 50.175950 | 22.616020 |
| std | 13.720825 | 14.929801 | 13.503089 | 15.959313 |
| min | 9.980000 | 21.190000 | 18.720000 | 0.000000 |
| 25% | 61.767500 | 56.940000 | 41.190000 | 10.621646 |
| 50% | 71.850000 | 70.325000 | 52.060000 | 21.645796 |
| 75% | 78.345000 | 74.367500 | 58.337500 | 31.864937 |
| max | 91.400000 | 92.380000 | 83.270000 | 100.000000 |
[Figura 03.17]
In [38]:
print('Figura 03.17')
# Número de filas y columnas en el grid
n_vars = len(variables)
n_cols = 3 # Puedes ajustar esto según el número de variables
n_rows = (n_vars + n_cols - 1) // n_cols # Calcula el número de filas necesarias
# Crear el grid de gráficos de dispersión con línea de regresión
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 4 * n_rows))
# Flatten the axes array for easy iteration
axes = axes.flatten()
for ax, var in zip(axes, variables):
sns.regplot(x='gdp_per_capita_2022', y=var, data=dimensiones_df, ax=ax, scatter_kws={'s':20}, line_kws={'color':'#ff7f0e'})
ax.set_title(f'GDP vs {var}')
ax.set_xlabel('GDP Per Capita 2022')
ax.set_ylabel(var)
ax.set_xlim(0, 100) # Limitar el eje x a [0, 100]
ax.set_ylim(0, 100) # Limitar el eje y a [0, 100]
# Eliminar ejes vacíos si hay menos gráficos que subgrillas
for ax in axes[len(variables):]:
ax.remove()
plt.tight_layout()
plt.show()
Figura 03.17
[Figura 3.18]
In [39]:
from scipy.stats import pearsonr
print('Figura 3.18')
print()
# Inicializar listas para los resultados
results = {
'Variable': [],
'Correlación': [],
'p-Valor': [],
'Significancia': []
}
# Calcular correlaciones y p-valores
for var in variables:
try:
corr, p_value = pearsonr(dimensiones_df['gdp_per_capita_2022'], dimensiones_df[var])
# Determinar significancia
if p_value < 0.05:
significancia = 'Significativo'
else:
significancia = 'No Significativo'
# Evaluar dirección de la correlación
if corr > 0:
direccion = 'Positiva'
elif corr < 0:
direccion = 'Negativa'
else:
direccion = 'Nula'
# Agregar resultados a las listas
results['Variable'].append(var)
results['Correlación'].append(corr)
results['p-Valor'].append(p_value)
results['Significancia'].append(f'{significancia}, {direccion}')
except KeyError as e:
print(f"Error con la variable {var}: {e}")
# Crear un DataFrame de pandas
results_df = pd.DataFrame(results)
# Mostrar la tabla usando tabulate para una presentación más estilizada
print('GDP Per Capita 2022')
print(tabulate(results_df, headers='keys', tablefmt='fancy_grid', floatfmt=(".3f", ".3f", ".3f")))
Figura 3.18 GDP Per Capita 2022 ╒════╤═══════════════════════╤═══════════════╤═════════════╤═════════════════════════╕ │ │ Variable │ Correlación │ p-Valor │ Significancia │ ╞════╪═══════════════════════╪═══════════════╪═════════════╪═════════════════════════╡ │ 0 │ basic_needs │ 0.273 │ 1.68483e-05 │ Significativo, Positiva │ ├────┼───────────────────────┼───────────────┼─────────────┼─────────────────────────┤ │ 1 │ foundations_wellbeing │ 0.641 │ 2.15237e-29 │ Significativo, Positiva │ ├────┼───────────────────────┼───────────────┼─────────────┼─────────────────────────┤ │ 2 │ opportunity │ 0.726 │ 6.90448e-41 │ Significativo, Positiva │ ╘════╧═══════════════════════╧═══════════════╧═════════════╧═════════════════════════╛
Guardar dataset Dimensiones¶
In [40]:
dimensiones_df.to_csv('03_dimensiones_minmax.csv',
index = False)